# Reset this number to 0 on major V8 upgrades.
# Increment by one for each non-official patch applied to deps/v8.
- 'v8_embedder_string': '-node.26',
+ 'v8_embedder_string': '-node.38',
##### V8 defaults for Node.js #####
!/third_party/googletest/src/googletest/include/gtest
/third_party/googletest/src/googletest/include/gtest/*
!/third_party/googletest/src/googletest/include/gtest/gtest_prod.h
+!/third_party/rapidhash-v8
!/third_party/test262-harness
!/third_party/v8
!/third_party/wasm-api
default = False,
)
+v8_flag(
+ name = "v8_enable_seeded_array_index_hash",
+ default = False,
+)
+
v8_int(
name = "v8_typed_array_max_size_in_heap",
default = 64,
"v8_enable_verify_heap": "VERIFY_HEAP",
"v8_enable_verify_predictable": "VERIFY_PREDICTABLE",
"v8_enable_webassembly": "V8_ENABLE_WEBASSEMBLY",
+ "v8_enable_seeded_array_index_hash": "V8_ENABLE_SEEDED_ARRAY_INDEX_HASH",
"v8_jitless": "V8_JITLESS",
},
defines = [
"src/numbers/conversions-inl.h",
"src/numbers/conversions.cc",
"src/numbers/conversions.h",
+ "src/numbers/hash-seed.h",
+ "src/numbers/hash-seed.cc",
"src/numbers/hash-seed-inl.h",
"src/numbers/integer-literal-inl.h",
"src/numbers/integer-literal.h",
"src/execution/pointer-authentication-dummy.h",
"src/heap/third-party/heap-api.h",
"src/heap/third-party/heap-api-stub.cc",
+ "third_party/rapidhash-v8/rapidhash.h",
+ "third_party/rapidhash-v8/secret.h",
] + select({
"@v8//bazel/config:v8_target_ia32": [
"src/baseline/ia32/baseline-assembler-ia32-inl.h",
# iOS (non-simulator) does not have executable pages for 3rd party
# applications yet so disable jit.
v8_jitless = v8_enable_lite_mode || target_is_ios_device
+
+ # Use a hard-coded secret value when hashing.
+ v8_use_default_hasher_secret = true
+
+ # Enable seeded array index hash.
+ v8_enable_seeded_array_index_hash = false
}
# Derived defaults.
# List of defines that can appear in externally visible header files and that
# are controlled by args.gn.
external_v8_defines = [
+ "V8_USE_DEFAULT_HASHER_SECRET=${v8_use_default_hasher_secret}",
"V8_ENABLE_CHECKS",
"V8_COMPRESS_POINTERS",
"V8_COMPRESS_POINTERS_IN_SHARED_CAGE",
"V8_ENABLE_CONSERVATIVE_STACK_SCANNING",
]
-enabled_external_v8_defines = []
+enabled_external_v8_defines = [
+ "V8_USE_DEFAULT_HASHER_SECRET=${v8_use_default_hasher_secret}",
+]
if (v8_enable_v8_checks) {
enabled_external_v8_defines += [ "V8_ENABLE_CHECKS" ]
if (v8_enable_lite_mode) {
defines += [ "V8_LITE_MODE" ]
}
+ if (v8_enable_seeded_array_index_hash) {
+ defines += [ "V8_ENABLE_SEEDED_ARRAY_INDEX_HASH" ]
+ }
if (v8_enable_gdbjit) {
defines += [ "ENABLE_GDB_JIT_INTERFACE" ]
}
"src/numbers/conversions-inl.h",
"src/numbers/conversions.h",
"src/numbers/hash-seed-inl.h",
+ "src/numbers/hash-seed.h",
"src/numbers/math-random.h",
"src/objects/all-objects-inl.h",
"src/objects/allocation-site-inl.h",
"src/temporal/temporal-parser.h",
"src/third_party/siphash/halfsiphash.h",
"src/third_party/utf8-decoder/utf8-decoder.h",
+ "third_party/rapidhash-v8/rapidhash.h",
+ "third_party/rapidhash-v8/secret.h",
"src/torque/runtime-macro-shims.h",
"src/tracing/trace-event.h",
"src/tracing/traced-value.h",
"src/logging/runtime-call-stats.cc",
"src/logging/tracing-flags.cc",
"src/numbers/conversions.cc",
+ "src/numbers/hash-seed.cc",
"src/numbers/math-random.cc",
"src/objects/backing-store.cc",
"src/objects/bigint.cc",
],
"etw-jit-metadata-win\.h": [
"+src/libplatform/etw/etw-provider-win.h",
- ]
+ ],
+ "string-hasher-inl\.h": [
+ "+third_party/rapidhash-v8/rapidhash.h",
+ ],
+ "heap\.cc": [
+ "+third_party/rapidhash-v8/secret.h",
+ ],
+ "hash-seed\.cc": [
+ "+third_party/rapidhash-v8/secret.h",
+ ],
}
// can't be convertible to an array index.
if (!IsIntegerIndex()) return false;
if (length() <= Name::kMaxCachedArrayIndexLength) {
- *index = Name::ArrayIndexValueBits::decode(raw_hash_field_);
+ *index = StringHasher::DecodeArrayIndexFromHashField(
+ raw_hash_field_, HashSeed(ReadOnlyHeap::GetReadOnlyRoots()));
return true;
}
// Might be an index, but too big to cache it. Do the slow conversion. This
return result;
}
-AstStringConstants::AstStringConstants(Isolate* isolate, uint64_t hash_seed)
+AstStringConstants::AstStringConstants(Isolate* isolate,
+ const HashSeed hash_seed)
: zone_(isolate->allocator(), ZONE_NAME),
string_table_(),
hash_seed_(hash_seed) {
#include "src/common/globals.h"
#include "src/handles/handles.h"
#include "src/numbers/conversions.h"
+#include "src/numbers/hash-seed.h"
#include "src/objects/name.h"
// Ast(Raw|Cons)String and AstValueFactory are for storing strings and
class AstStringConstants final {
public:
- AstStringConstants(Isolate* isolate, uint64_t hash_seed);
+ AstStringConstants(Isolate* isolate, const HashSeed hash_seed);
AstStringConstants(const AstStringConstants&) = delete;
AstStringConstants& operator=(const AstStringConstants&) = delete;
AST_STRING_CONSTANTS(F)
#undef F
- uint64_t hash_seed() const { return hash_seed_; }
+ const HashSeed hash_seed() const { return hash_seed_; }
const AstRawStringMap* string_table() const { return &string_table_; }
private:
Zone zone_;
AstRawStringMap string_table_;
- uint64_t hash_seed_;
+ const HashSeed hash_seed_;
#define F(name, str) AstRawString* name##_string_;
AST_STRING_CONSTANTS(F)
class AstValueFactory {
public:
AstValueFactory(Zone* zone, const AstStringConstants* string_constants,
- uint64_t hash_seed)
+ const HashSeed hash_seed)
: AstValueFactory(zone, zone, string_constants, hash_seed) {}
AstValueFactory(Zone* ast_raw_string_zone, Zone* single_parse_zone,
const AstStringConstants* string_constants,
- uint64_t hash_seed)
+ const HashSeed hash_seed)
: string_table_(string_constants->string_table()),
strings_(nullptr),
strings_end_(&strings_),
Zone* ast_raw_string_zone_;
Zone* single_parse_zone_;
- uint64_t hash_seed_;
+ const HashSeed hash_seed_;
};
extern template EXPORT_TEMPLATE_DECLARE(
const hash: NameHash = s.raw_hash_field;
if (IsIntegerIndex(hash) &&
hash.array_index_length < kMaxCachedArrayIndexLength) {
- const arrayIndex: uint32 = hash.array_index_value;
+ const arrayIndex: uint32 = DecodeArrayIndexFromHashField(hash);
return SmiFromUint32(arrayIndex);
}
// Fall back to the runtime to convert string to a number.
const hash: NameHash = s.raw_hash_field;
if (IsIntegerIndex(hash) &&
hash.array_index_length < kMaxCachedArrayIndexLength) {
- const arrayIndex: uint32 = hash.array_index_value;
+ const arrayIndex: uint32 = DecodeArrayIndexFromHashField(hash);
return SmiFromUint32(arrayIndex);
}
// Fall back to the runtime.
return var_hash.value();
}
+#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+// Mirror C++ StringHasher::SeedArrayIndexValue.
+TNode<Uint32T> CodeStubAssembler::SeedArrayIndexValue(TNode<Uint32T> value) {
+ // Load m1, m2 and m3 from the hash seed byte array. In the compiled code
+ // these will always come from the read-only roots.
+ TNode<ByteArray> hash_seed = CAST(LoadRoot(RootIndex::kHashSeed));
+ intptr_t base_offset = ByteArray::kHeaderSize - kHeapObjectTag;
+ TNode<Uint32T> m1 = Load<Uint32T>(
+ hash_seed, IntPtrConstant(base_offset + HashSeed::kDerivedM1Offset));
+ TNode<Uint32T> m2 = Load<Uint32T>(
+ hash_seed, IntPtrConstant(base_offset + HashSeed::kDerivedM2Offset));
+ TNode<Uint32T> m3 = Load<Uint32T>(
+ hash_seed, IntPtrConstant(base_offset + HashSeed::kDerivedM3Offset));
+
+ TNode<Word32T> x = value;
+ // 3-round xorshift-multiply.
+ x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift)));
+ x = Word32And(Uint32Mul(Unsigned(x), m1),
+ Uint32Constant(Name::kArrayIndexValueMask));
+ x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift)));
+ x = Word32And(Uint32Mul(Unsigned(x), m2),
+ Uint32Constant(Name::kArrayIndexValueMask));
+ x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift)));
+ x = Word32And(Uint32Mul(Unsigned(x), m3),
+ Uint32Constant(Name::kArrayIndexValueMask));
+ x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift)));
+
+ return Unsigned(x);
+}
+
+// Mirror C++ StringHasher::UnseedArrayIndexValue.
+TNode<Uint32T> CodeStubAssembler::UnseedArrayIndexValue(TNode<Uint32T> value) {
+ // Load m1_inv, m2_inv and m3_inv from the hash seed byte array. In the
+ // compiled code these will always come from the read-only roots.
+ TNode<ByteArray> hash_seed = CAST(LoadRoot(RootIndex::kHashSeed));
+ intptr_t base_offset = ByteArray::kHeaderSize - kHeapObjectTag;
+ TNode<Uint32T> m1_inv = Load<Uint32T>(
+ hash_seed, IntPtrConstant(base_offset + HashSeed::kDerivedM1InvOffset));
+ TNode<Uint32T> m2_inv = Load<Uint32T>(
+ hash_seed, IntPtrConstant(base_offset + HashSeed::kDerivedM2InvOffset));
+ TNode<Uint32T> m3_inv = Load<Uint32T>(
+ hash_seed, IntPtrConstant(base_offset + HashSeed::kDerivedM3InvOffset));
+
+ TNode<Word32T> x = value;
+ // 3-round xorshift-multiply (inverse).
+ // Xorshift is an involution when kShift is at least half of the value width.
+ x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift)));
+ x = Word32And(Uint32Mul(Unsigned(x), m3_inv),
+ Uint32Constant(Name::kArrayIndexValueMask));
+ x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift)));
+ x = Word32And(Uint32Mul(Unsigned(x), m2_inv),
+ Uint32Constant(Name::kArrayIndexValueMask));
+ x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift)));
+ x = Word32And(Uint32Mul(Unsigned(x), m1_inv),
+ Uint32Constant(Name::kArrayIndexValueMask));
+ x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift)));
+ return Unsigned(x);
+}
+#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+
TNode<Uint32T> CodeStubAssembler::LoadNameHashAssumeComputed(TNode<Name> name) {
TNode<Uint32T> hash_field = LoadNameRawHash(name);
CSA_DCHECK(this, IsClearWord32(hash_field, Name::kHashNotComputedMask));
GotoIf(IsSetWord32(raw_hash_field, Name::kDoesNotContainCachedArrayIndexMask),
&runtime);
- var_result = SmiTag(Signed(
- DecodeWordFromWord32<String::ArrayIndexValueBits>(raw_hash_field)));
+ var_result = SmiFromUint32(DecodeArrayIndexFromHashField(raw_hash_field));
Goto(&end);
BIND(&runtime);
BIND(&if_has_cached_index);
{
- TNode<IntPtrT> index = Signed(
- DecodeWordFromWord32<String::ArrayIndexValueBits>(raw_hash_field));
+ TNode<IntPtrT> index = Signed(ChangeUint32ToWord(
+ DecodeArrayIndexFromHashField(raw_hash_field)));
CSA_DCHECK(this, IntPtrLessThan(index, IntPtrConstant(INT_MAX)));
*var_index = index;
Goto(if_keyisindex);
TNode<Uint8T> property_details,
Label* needs_resize);
+#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+ // Mirror C++ StringHasher::SeedArrayIndexValue and UnseedArrayIndexValue.
+ TNode<Uint32T> SeedArrayIndexValue(TNode<Uint32T> value);
+ TNode<Uint32T> UnseedArrayIndexValue(TNode<Uint32T> value);
+#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+
private:
friend class CodeStubArguments;
static uint32_t ComputeSeededIntegerHash(Isolate* isolate, int32_t key) {
DisallowGarbageCollection no_gc;
- return ComputeSeededHash(static_cast<uint32_t>(key), HashSeed(isolate));
+ return ComputeSeededHash(static_cast<uint32_t>(key),
+ HashSeed(isolate).seed());
}
FUNCTION_REFERENCE(compute_integer_hash, ComputeSeededIntegerHash)
wire_bytes.GetFunctionBytes(&func);
// TODO(herhut): Maybe also take module, name and signature into account.
return i::StringHasher::HashSequentialString(function_bytes.begin(),
- function_bytes.length(), 0);
+ function_bytes.length(),
+ internal::HashSeed::Default());
}
int WasmScript::CodeOffset() const {
template <typename Impl>
Handle<ByteArray> FactoryBase<Impl>::NewByteArray(int length,
- AllocationType allocation) {
+ AllocationType allocation,
+ AllocationAlignment alignment) {
if (length < 0 || length > ByteArray::kMaxLength) {
FATAL("Fatal JavaScript invalid size error %d", length);
UNREACHABLE();
if (length == 0) return impl()->empty_byte_array();
int size = ALIGN_TO_ALLOCATION_ALIGNMENT(ByteArray::SizeFor(length));
HeapObject result = AllocateRawWithImmortalMap(
- size, allocation, read_only_roots().byte_array_map());
+ size, allocation, read_only_roots().byte_array_map(), alignment);
DisallowGarbageCollection no_gc;
ByteArray array = ByteArray::cast(result);
array.set_length(length);
if (raw.raw_hash_field() == String::kEmptyHashField &&
number.value() >= 0) {
uint32_t raw_hash_field = StringHasher::MakeArrayIndexHash(
- static_cast<uint32_t>(number.value()), raw.length());
+ static_cast<uint32_t>(number.value()), raw.length(),
+ HashSeed(read_only_roots()));
raw.set_raw_hash_field(raw_hash_field);
}
}
template <typename Impl>
HeapObject FactoryBase<Impl>::AllocateRawArray(int size,
- AllocationType allocation) {
- HeapObject result = AllocateRaw(size, allocation);
+ AllocationType allocation,
+ AllocationAlignment alignment) {
+ HeapObject result = AllocateRaw(size, allocation, alignment);
if (!V8_ENABLE_THIRD_PARTY_HEAP_BOOL &&
(size >
isolate()->heap()->AsHeap()->MaxRegularHeapObjectSize(allocation)) &&
// The function returns a pre-allocated empty byte array for length = 0.
Handle<ByteArray> NewByteArray(
- int length, AllocationType allocation = AllocationType::kYoung);
+ int length, AllocationType allocation = AllocationType::kYoung,
+ AllocationAlignment alignment = kTaggedAligned);
Handle<BytecodeArray> NewBytecodeArray(int length, const byte* raw_bytecodes,
int frame_size, int parameter_count,
static constexpr int kNumberToStringBufferSize = 32;
// Allocate memory for an uninitialized array (e.g., a FixedArray or similar).
- HeapObject AllocateRawArray(int size, AllocationType allocation);
+ HeapObject AllocateRawArray(int size, AllocationType allocation,
+ AllocationAlignment alignment = kTaggedAligned);
HeapObject AllocateRawFixedArray(int length, AllocationType allocation);
HeapObject AllocateRawWeakArrayList(int length, AllocationType allocation);
if (value <= JSArray::kMaxArrayIndex &&
raw.raw_hash_field() == String::kEmptyHashField) {
uint32_t raw_hash_field = StringHasher::MakeArrayIndexHash(
- static_cast<uint32_t>(value), raw.length());
+ static_cast<uint32_t>(value), raw.length(),
+ HashSeed(read_only_roots()));
raw.set_raw_hash_field(raw_hash_field);
}
}
heap_allocator_.Setup();
}
-void Heap::InitializeHashSeed() {
- DCHECK(!deserialization_complete_);
- uint64_t new_hash_seed;
- if (v8_flags.hash_seed == 0) {
- int64_t rnd = isolate()->random_number_generator()->NextInt64();
- new_hash_seed = static_cast<uint64_t>(rnd);
- } else {
- new_hash_seed = static_cast<uint64_t>(v8_flags.hash_seed);
- }
- ReadOnlyRoots(this).hash_seed().copy_in(
- 0, reinterpret_cast<byte*>(&new_hash_seed), kInt64Size);
-}
-
// static
void Heap::InitializeOncePerProcess() {
#ifdef V8_ENABLE_ALLOCATION_TIMEOUT
// Prepares the heap, setting up for deserialization.
void InitializeMainThreadLocalHeap(LocalHeap* main_thread_local_heap);
- // (Re-)Initialize hash seed from flag or RNG.
- void InitializeHashSeed();
-
// Invoked once for the process from V8::Initialize.
static void InitializeOncePerProcess();
// static
ReadOnlyRoots ReadOnlyHeap::GetReadOnlyRoots(HeapObject object) {
#ifdef V8_SHARED_RO_HEAP
+ return GetReadOnlyRoots();
+#else
+ return ReadOnlyRoots(GetHeapFromWritableObject(object));
+#endif // V8_SHARED_RO_HEAP
+}
+
+#ifdef V8_SHARED_RO_HEAP
+ReadOnlyRoots ReadOnlyHeap::GetReadOnlyRoots() {
auto* shared_ro_heap = SoleReadOnlyHeap::shared_ro_heap_;
// If this check fails in code that runs during initialization use
// EarlyGetReadOnlyRoots instead.
DCHECK(shared_ro_heap && shared_ro_heap->roots_init_complete());
return ReadOnlyRoots(shared_ro_heap->read_only_roots_);
-#else
- return ReadOnlyRoots(GetHeapFromWritableObject(object));
-#endif // V8_SHARED_RO_HEAP
}
+#endif // V8_SHARED_RO_HEAP
} // namespace internal
} // namespace v8
// must be initialized
V8_EXPORT_PRIVATE inline static ReadOnlyRoots GetReadOnlyRoots(
HeapObject object);
+
+#ifdef V8_SHARED_RO_HEAP
+ V8_EXPORT_PRIVATE inline static ReadOnlyRoots GetReadOnlyRoots();
+#endif // V8_SHARED_RO_HEAP
+
// Returns the current isolates roots table during initialization as opposed
// to the shared one in case the latter is not initialized yet.
V8_EXPORT_PRIVATE inline static ReadOnlyRoots EarlyGetReadOnlyRoots(
#include "src/init/heap-symbols.h"
#include "src/init/setup-isolate.h"
#include "src/interpreter/interpreter.h"
+#include "src/numbers/hash-seed.h"
#include "src/objects/arguments.h"
#include "src/objects/call-site-info.h"
#include "src/objects/cell-inl.h"
// Hash seed for strings
Factory* factory = isolate()->factory();
- set_hash_seed(*factory->NewByteArray(kInt64Size, AllocationType::kReadOnly));
- InitializeHashSeed();
+ set_hash_seed(*factory->NewByteArray(HashSeed::kTotalSize,
+ AllocationType::kReadOnly,
+ AllocationAlignment::kDoubleAligned));
+ HashSeed::InitializeRoots(isolate());
// Important strings and symbols
for (const ConstantStringInit& entry : kImportantConstantStringTable) {
template <typename Char>
JsonParser<Char>::JsonParser(Isolate* isolate, Handle<String> source)
: isolate_(isolate),
- hash_seed_(HashSeed(isolate)),
object_constructor_(isolate_->object_function()),
original_source_(source) {
size_t start = 0;
int position() const { return static_cast<int>(cursor_ - chars_); }
Isolate* isolate_;
- const uint64_t hash_seed_;
JsonToken next_;
// Indicates whether the bytes underneath source_ can relocate during GC.
bool chars_may_relocate_;
#ifndef V8_NUMBERS_HASH_SEED_INL_H_
#define V8_NUMBERS_HASH_SEED_INL_H_
-#include <stdint.h>
-
-// The #includes below currently lead to cyclic transitive includes, so
-// HashSeed() ends up being required before it is defined, so we have to
-// declare it here. This is a workaround; if we needed this permanently then
-// we should put that line into a "hash-seed.h" header; but we won't need
-// it for long.
-// TODO(jkummerow): Get rid of this by breaking circular include dependencies.
-namespace v8 {
-namespace internal {
-
-class Isolate;
-class LocalIsolate;
-class ReadOnlyRoots;
-
-inline uint64_t HashSeed(Isolate* isolate);
-inline uint64_t HashSeed(LocalIsolate* isolate);
-inline uint64_t HashSeed(ReadOnlyRoots roots);
-
-} // namespace internal
-} // namespace v8
-
-// See comment above for why this isn't at the top of the file.
+#include "src/numbers/hash-seed.h"
#include "src/objects/fixed-array-inl.h"
#include "src/roots/roots-inl.h"
namespace v8 {
namespace internal {
-inline uint64_t HashSeed(Isolate* isolate) {
- return HashSeed(ReadOnlyRoots(isolate));
-}
+inline HashSeed::HashSeed(Isolate* isolate)
+ : HashSeed(ReadOnlyRoots(isolate)) {}
+
+inline HashSeed::HashSeed(LocalIsolate* isolate)
+ : HashSeed(ReadOnlyRoots(isolate)) {}
+
+inline HashSeed::HashSeed(ReadOnlyRoots roots)
+ : data_(reinterpret_cast<const Data*>(
+ roots.hash_seed().GetDataStartAddress())) {}
+
+inline HashSeed HashSeed::Default() { return HashSeed(kDefaultData); }
-inline uint64_t HashSeed(LocalIsolate* isolate) {
- return HashSeed(ReadOnlyRoots(isolate));
-}
+inline uint64_t HashSeed::seed() const { return data_->seed; }
+inline const uint64_t* HashSeed::secret() const { return data_->secrets; }
-inline uint64_t HashSeed(ReadOnlyRoots roots) {
- uint64_t seed;
- roots.hash_seed().copy_out(0, reinterpret_cast<byte*>(&seed), kInt64Size);
- return seed;
-}
+#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+inline uint32_t HashSeed::m1() const { return data_->m1; }
+inline uint32_t HashSeed::m1_inv() const { return data_->m1_inv; }
+inline uint32_t HashSeed::m2() const { return data_->m2; }
+inline uint32_t HashSeed::m2_inv() const { return data_->m2_inv; }
+inline uint32_t HashSeed::m3() const { return data_->m3; }
+inline uint32_t HashSeed::m3_inv() const { return data_->m3_inv; }
+#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
} // namespace internal
} // namespace v8
--- /dev/null
+// Copyright 2026 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/numbers/hash-seed.h"
+
+#include <cstddef>
+
+#include "src/execution/isolate.h"
+#include "src/flags/flags.h"
+#include "src/numbers/hash-seed-inl.h"
+#include "src/objects/name.h"
+#include "third_party/rapidhash-v8/secret.h"
+
+namespace v8 {
+namespace internal {
+
+namespace {
+
+#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+// Calculate the modular inverse using Newton's method.
+constexpr uint32_t modular_inverse(uint32_t m) {
+ uint32_t x = (3 * m) ^ 2; // 5 correct bits
+ x = x * (2 - m * x); // 10 correct bits
+ x = x * (2 - m * x); // 20 correct bits
+ x = x * (2 - m * x); // 40 correct bits
+ return x;
+}
+
+constexpr uint32_t truncate_for_derived_secrets(uint64_t s) {
+ return static_cast<uint32_t>(s) & Name::kArrayIndexValueMask;
+}
+
+// Derive a multiplier from a rapidhash secret and ensure it's odd.
+constexpr uint32_t derive_multiplier(uint64_t secret) {
+ return truncate_for_derived_secrets(secret) | 1;
+}
+
+// Compute the modular inverse of the derived multiplier.
+constexpr uint32_t derive_multiplier_inverse(uint64_t secret) {
+ return truncate_for_derived_secrets(
+ modular_inverse(derive_multiplier(secret)));
+}
+
+constexpr bool is_modular_inverse(uint32_t m, uint32_t m_inv) {
+ return ((m * m_inv) & Name::kArrayIndexValueMask) == 1;
+}
+
+constexpr void DeriveSecretsForArrayIndexHash(HashSeed::Data* data) {
+ data->m1 = derive_multiplier(data->secrets[0]);
+ data->m1_inv = derive_multiplier_inverse(data->secrets[0]);
+ data->m2 = derive_multiplier(data->secrets[1]);
+ data->m2_inv = derive_multiplier_inverse(data->secrets[1]);
+ data->m3 = derive_multiplier(data->secrets[2]);
+ data->m3_inv = derive_multiplier_inverse(data->secrets[2]);
+}
+#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+
+static constexpr HashSeed::Data kDefaultSeed = [] {
+ HashSeed::Data d{};
+ d.seed = 0;
+ d.secrets[0] = RAPIDHASH_DEFAULT_SECRET[0];
+ d.secrets[1] = RAPIDHASH_DEFAULT_SECRET[1];
+ d.secrets[2] = RAPIDHASH_DEFAULT_SECRET[2];
+#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+ DeriveSecretsForArrayIndexHash(&d);
+#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+ return d;
+}();
+
+} // anonymous namespace
+
+static_assert(HashSeed::kSecretsCount == arraysize(RAPIDHASH_DEFAULT_SECRET));
+const HashSeed::Data* const HashSeed::kDefaultData = &kDefaultSeed;
+
+#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+// Compile-time verification that m * m_inv === 1 for the derived secrets.
+static_assert(is_modular_inverse(kDefaultSeed.m1, kDefaultSeed.m1_inv));
+static_assert(is_modular_inverse(kDefaultSeed.m2, kDefaultSeed.m2_inv));
+static_assert(is_modular_inverse(kDefaultSeed.m3, kDefaultSeed.m3_inv));
+#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+
+// static
+void HashSeed::InitializeRoots(Isolate* isolate) {
+ DCHECK(!isolate->heap()->deserialization_complete());
+ uint64_t seed;
+ if (v8_flags.hash_seed == 0) {
+ int64_t rnd = isolate->random_number_generator()->NextInt64();
+ seed = static_cast<uint64_t>(rnd);
+ } else {
+ seed = static_cast<uint64_t>(v8_flags.hash_seed);
+ }
+
+ // Write the seed and derived secrets into the read-only roots ByteArray.
+ Data* data = const_cast<Data*>(HashSeed(isolate).data_);
+
+#if V8_USE_DEFAULT_HASHER_SECRET
+ // Copy from the default seed and just override the meta seed.
+ *data = kDefaultSeed;
+ data->seed = seed;
+#else
+ data->seed = seed;
+ rapidhash_make_secret(seed, data->secrets);
+#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+ DeriveSecretsForArrayIndexHash(data);
+ DCHECK(is_modular_inverse(data->m1, data->m1_inv));
+ DCHECK(is_modular_inverse(data->m2, data->m2_inv));
+ DCHECK(is_modular_inverse(data->m3, data->m3_inv));
+#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+#endif // V8_USE_DEFAULT_HASHER_SECRET
+}
+
+} // namespace internal
+} // namespace v8
--- /dev/null
+// Copyright 2019 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_NUMBERS_HASH_SEED_H_
+#define V8_NUMBERS_HASH_SEED_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#include "src/base/macros.h"
+
+namespace v8 {
+namespace internal {
+
+class Isolate;
+class LocalIsolate;
+class ReadOnlyRoots;
+
+// A lightweight view over the hash_seed ByteArray in read-only roots.
+class V8_EXPORT_PRIVATE HashSeed {
+ public:
+ inline explicit HashSeed(Isolate* isolate);
+ inline explicit HashSeed(LocalIsolate* isolate);
+ inline explicit HashSeed(ReadOnlyRoots roots);
+
+ static inline HashSeed Default();
+
+ inline uint64_t seed() const;
+ inline const uint64_t* secret() const;
+
+ bool operator==(const HashSeed& b) const { return data_ == b.data_; }
+
+ static constexpr int kSecretsCount = 3;
+
+ // The ReadOnlyRoots::hash_seed() byte array can be interpreted
+ // as a HashSeed::Data struct.
+ // Since this maps over either the read-only roots or over a static byte
+ // array, and in both cases, must be allocated at 8-byte boundaries,
+ // we don't use V8_OBJECT here.
+ struct Data {
+ // meta seed from --hash-seed, 0 = generate at startup
+ uint64_t seed;
+ // When V8_USE_DEFAULT_HASHER_SECRET is enabled, these are just
+ // RAPIDHASH_DEFAULT_SECRET. Otherwise they are derived from the seed
+ // using rapidhash_make_secret().
+ uint64_t secrets[kSecretsCount];
+
+#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+ // Additional precomputed secrets for seeding the array index value hashes.
+ uint32_t m1; // lower kArrayIndexValueBits bits of secret[0], must be odd
+ uint32_t m1_inv; // modular inverse of m1 mod 2^kArrayIndexValueBits
+ uint32_t m2; // lower kArrayIndexValueBits bits of secret[1], must be odd
+ uint32_t m2_inv; // modular inverse of m2 mod 2^kArrayIndexValueBits
+ uint32_t m3; // lower kArrayIndexValueBits bits of secret[2], must be odd
+ uint32_t m3_inv; // modular inverse of m3 mod 2^kArrayIndexValueBits
+#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+ };
+
+ static constexpr int kTotalSize = sizeof(Data);
+
+#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+ // Byte offsets from the data start, for CSA that loads fields at raw
+ // offsets from the ByteArray data start.
+ static constexpr int kDerivedM1Offset = offsetof(Data, m1);
+ static constexpr int kDerivedM1InvOffset = offsetof(Data, m1_inv);
+ static constexpr int kDerivedM2Offset = offsetof(Data, m2);
+ static constexpr int kDerivedM2InvOffset = offsetof(Data, m2_inv);
+ static constexpr int kDerivedM3Offset = offsetof(Data, m3);
+ static constexpr int kDerivedM3InvOffset = offsetof(Data, m3_inv);
+
+ inline uint32_t m1() const;
+ inline uint32_t m1_inv() const;
+ inline uint32_t m2() const;
+ inline uint32_t m2_inv() const;
+ inline uint32_t m3() const;
+ inline uint32_t m3_inv() const;
+#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+
+ // Generates a hash seed (from --hash-seed or the RNG) and writes it
+ // together with derived secrets into the isolate's hash_seed in
+ // its read-only roots.
+ static void InitializeRoots(Isolate* isolate);
+
+ private:
+ // Pointer into the Data overlaying the ByteArray data (either
+ // points to read-only roots or to kDefaultData).
+ const Data* data_;
+ explicit HashSeed(const Data* data) : data_(data) {}
+
+ // Points to the static constexpr default seed.
+ static const Data* const kDefaultData;
+};
+
+static_assert(std::is_trivially_copyable_v<HashSeed>);
+static_assert(alignof(HashSeed::Data) == alignof(uint64_t));
+
+} // namespace internal
+} // namespace v8
+
+#endif // V8_NUMBERS_HASH_SEED_H_
}
uint32_t NumberDictionaryBaseShape::Hash(ReadOnlyRoots roots, uint32_t key) {
- return ComputeSeededHash(key, HashSeed(roots));
+ return ComputeSeededHash(key, HashSeed(roots).seed());
}
uint32_t NumberDictionaryBaseShape::HashForObject(ReadOnlyRoots roots,
Object other) {
DCHECK(other.IsNumber());
return ComputeSeededHash(static_cast<uint32_t>(other.Number()),
- HashSeed(roots));
+ HashSeed(roots).seed());
}
Handle<Object> NumberDictionaryBaseShape::AsHandle(Isolate* isolate,
// For strings which are array indexes the hash value has the string length
// mixed into the hash, mainly to avoid a hash value of zero which would be
// the case for the string '0'. 24 bits are used for the array index value.
- static const int kArrayIndexValueBits = 24;
+ static constexpr int kArrayIndexValueBits = 24;
+ // Mask for extracting the lower kArrayIndexValueBits of a value.
+ static constexpr uint32_t kArrayIndexValueMask =
+ (1u << kArrayIndexValueBits) - 1;
+#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+ // Half-width shift used by the seeded xorshift-multiply mixing.
+ static constexpr int kArrayIndexHashShift = kArrayIndexValueBits / 2;
+ // The shift must be at least the half width for the xorshift to be an
+ // involution.
+ static_assert(kArrayIndexHashShift * 2 >= kArrayIndexValueBits,
+ "kArrayIndexHashShift must be at least half of "
+ "kArrayIndexValueBits");
+#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
static const int kArrayIndexLengthBits =
kBitsPerInt - kArrayIndexValueBits - HashFieldTypeBits::kSize;
return (hash & kDoesNotContainCachedArrayIndexMask) == 0;
}
+extern macro SeedArrayIndexValue(uint32): uint32;
+extern macro UnseedArrayIndexValue(uint32): uint32;
+
const kArrayIndexValueBitsShift: uint32 = kNofHashBitFields;
const kArrayIndexLengthBitsShift: uint32 =
kNofHashBitFields + kArrayIndexValueBits;
return hash.hash_field_type == HashFieldType::kIntegerIndex;
}
-macro MakeArrayIndexHash(value: uint32, length: uint32): NameHash {
- // This is in sync with StringHasher::MakeArrayIndexHash.
+// This is in sync with the private StringHasher::MakeArrayIndexHash without
+// seeding. Do not call directly, use the @export MakeArrayIndexHash wrapper
+// below.
+macro MakeArrayIndexHashRaw(value: uint32, length: uint32): NameHash {
dcheck(length <= kMaxArrayIndexSize);
const one: uint32 = 1;
dcheck(TenToThe(kMaxCachedArrayIndexLength) < (one << kArrayIndexValueBits));
dcheck(IsIntegerIndex(hash));
return hash;
}
+
+// This is in sync with the private StringHasher::DecodeArrayIndexFromHashField
+// without seeding. Do not call directly, use the @export
+// DecodeArrayIndexFromHashField wrapper below.
+macro DecodeArrayIndexFromHashFieldRaw(rawHashField: uint32): uint32 {
+ const hash: NameHash = %RawDownCast<NameHash>(rawHashField);
+ dcheck(ContainsCachedArrayIndex(rawHashField) || IsIntegerIndex(hash));
+ return hash.array_index_value;
+}
+
+// Mirror C++ public StringHasher::MakeArrayIndexHash.
+@export
+macro MakeArrayIndexHash(value: uint32, length: uint32): NameHash {
+ @if(V8_ENABLE_SEEDED_ARRAY_INDEX_HASH) {
+ return MakeArrayIndexHashRaw(SeedArrayIndexValue(value), length);
+ }
+ @ifnot(V8_ENABLE_SEEDED_ARRAY_INDEX_HASH) {
+ return MakeArrayIndexHashRaw(value, length);
+ }
+}
+
+// Mirror C++ public StringHasher::DecodeArrayIndexFromHashField.
+@export
+macro DecodeArrayIndexFromHashField(rawHashField: uint32): uint32 {
+ const value: uint32 = DecodeArrayIndexFromHashFieldRaw(rawHashField);
+ @if(V8_ENABLE_SEEDED_ARRAY_INDEX_HASH) {
+ return UnseedArrayIndexValue(value);
+ }
+ @ifnot(V8_ENABLE_SEEDED_ARRAY_INDEX_HASH) {
+ return value;
+ }
+}
return dest.ptr();
}
-uint32_t StringHasher::MakeArrayIndexHash(uint32_t value, int length) {
- // For array indexes mix the length into the hash as an array index could
- // be zero.
- DCHECK_GT(length, 0);
- DCHECK_LE(length, String::kMaxArrayIndexSize);
- DCHECK(TenToThe(String::kMaxCachedArrayIndexLength) <
- (1 << String::kArrayIndexValueBits));
-
- value <<= String::ArrayIndexValueBits::kShift;
- value |= length << String::ArrayIndexLengthBits::kShift;
-
- DCHECK(String::IsIntegerIndex(value));
- DCHECK_EQ(length <= String::kMaxCachedArrayIndexLength,
- Name::ContainsCachedArrayIndex(value));
- return value;
-}
STATIC_ASSERT_FIELD_OFFSETS_EQUAL(HeapNumber::kValueOffset,
Oddball::kToNumberRawOffset);
template <typename Char>
class SequentialStringKey final : public StringTableKey {
public:
- SequentialStringKey(const base::Vector<const Char>& chars, uint64_t seed,
+ SequentialStringKey(const base::Vector<const Char>& chars, const HashSeed seed,
bool convert = false)
: SequentialStringKey(StringHasher::HashSequentialString<Char>(
chars.begin(), chars.length(), seed),
#ifdef ENABLE_SLOW_DCHECKS
uint32_t String::FlatContent::ComputeChecksum() const {
- constexpr uint64_t hashseed = 1;
uint32_t hash;
if (state_ == ONE_BYTE) {
- hash = StringHasher::HashSequentialString(onebyte_start, length_, hashseed);
+ hash = StringHasher::HashSequentialString(onebyte_start, length_,
+ HashSeed::Default());
} else {
DCHECK_EQ(TWO_BYTE, state_);
- hash = StringHasher::HashSequentialString(twobyte_start, length_, hashseed);
+ hash = StringHasher::HashSequentialString(twobyte_start, length_,
+ HashSeed::Default());
}
DCHECK_NE(kChecksumVerificationDisabled, hash);
return hash;
DisallowGarbageCollection no_gc;
uint32_t field = raw_hash_field();
if (ContainsCachedArrayIndex(field)) {
- *index = ArrayIndexValueBits::decode(field);
+ *index = StringHasher::DecodeArrayIndexFromHashField(
+ field, HashSeed(EarlyGetReadOnlyRoots()));
return true;
}
if (IsHashFieldComputed(field) && !IsIntegerIndex(field)) {
bool String::AsIntegerIndex(size_t* index) {
uint32_t field = raw_hash_field();
if (ContainsCachedArrayIndex(field)) {
- *index = ArrayIndexValueBits::decode(field);
+ *index = StringHasher::DecodeArrayIndexFromHashField(
+ field, HashSeed(EarlyGetReadOnlyRoots()));
return true;
}
if (IsHashFieldComputed(field) && !IsIntegerIndex(field)) {
return internalized.ptr();
}
- uint64_t seed = HashSeed(isolate);
+ const HashSeed seed = HashSeed(isolate);
std::unique_ptr<Char[]> buffer;
const Char* chars;
}
// TODO(verwaest): Internalize to one-byte when possible.
SequentialStringKey<Char> key(raw_hash_field,
- base::Vector<const Char>(chars, length), seed);
+ base::Vector<const Char>(chars, length),
+ seed.seed());
// String could be an array index.
if (Name::ContainsCachedArrayIndex(raw_hash_field)) {
- return Smi::FromInt(String::ArrayIndexValueBits::decode(raw_hash_field))
+ return Smi::FromInt(StringHasher::DecodeArrayIndexFromHashField(
+ raw_hash_field, seed))
.ptr();
}
(len == 1 || data[0] != '0')) {
// String hash is not calculated yet but all the data are present.
// Update the hash field to speed up sequential convertions.
- uint32_t raw_hash_field = StringHasher::MakeArrayIndexHash(d, len);
+ uint32_t raw_hash_field =
+ StringHasher::MakeArrayIndexHash(d, len, HashSeed(isolate));
#ifdef DEBUG
subject->EnsureHash(); // Force hash calculation.
DCHECK_EQ(subject->raw_hash_field(), raw_hash_field);
namespace {
template <typename Char>
-uint32_t HashString(String string, size_t start, int length, uint64_t seed,
+uint32_t HashString(String string, size_t start, int length,
+ const HashSeed seed,
PtrComprCageBase cage_base,
const SharedStringAccessGuardIfNeeded& access_guard) {
DisallowGarbageCollection no_gc;
DCHECK_IMPLIES(!v8_flags.shared_string_table, !HasHashCode());
// Store the hash code in the object.
- uint64_t seed = HashSeed(EarlyGetReadOnlyRoots());
+ const HashSeed seed = HashSeed(EarlyGetReadOnlyRoots());
size_t start = 0;
String string = *this;
PtrComprCageBase cage_base = GetPtrComprCageBase(string);
if (length <= kMaxCachedArrayIndexLength) {
uint32_t field = EnsureRawHash(); // Force computation of hash code.
if (!IsIntegerIndex(field)) return false;
- *index = ArrayIndexValueBits::decode(field);
+ *index = StringHasher::DecodeArrayIndexFromHashField(
+ field, HashSeed(EarlyGetReadOnlyRoots()));
return true;
}
if (length == 0 || length > kMaxArrayIndexSize) return false;
if (length <= kMaxCachedArrayIndexLength) {
uint32_t field = EnsureRawHash(); // Force computation of hash code.
if (!IsIntegerIndex(field)) return false;
- *index = ArrayIndexValueBits::decode(field);
+ *index = StringHasher::DecodeArrayIndexFromHashField(
+ field, HashSeed(EarlyGetReadOnlyRoots()));
return true;
}
if (length == 0 || length > kMaxIntegerIndexSize) return false;
#include "src/base/logging.h"
#include "src/common/globals.h"
#include "src/handles/handles.h"
+#include "src/numbers/hash-seed.h"
#include "src/objects/function-kind.h"
#include "src/objects/function-syntax-kind.h"
#include "src/objects/script.h"
AstValueFactory* ast_value_factory() const {
return ast_value_factory_.get();
}
- uint64_t hash_seed() const { return hash_seed_; }
+ HashSeed hash_seed() const { return hash_seed_; }
AccountingAllocator* allocator() const { return allocator_; }
const AstStringConstants* ast_string_constants() const {
return ast_string_constants_;
LazyCompileDispatcher* dispatcher() const { return dispatcher_; }
private:
- uint64_t hash_seed_;
+ const HashSeed hash_seed_;
AccountingAllocator* allocator_;
V8FileLogger* v8_file_logger_;
LazyCompileDispatcher* dispatcher_;
const UnoptimizedCompileFlags& flags() const { return flags_; }
// Getters for reusable state.
- uint64_t hash_seed() const { return reusable_state_->hash_seed(); }
+ HashSeed hash_seed() const { return reusable_state_->hash_seed(); }
AccountingAllocator* allocator() const {
return reusable_state_->allocator();
}
uint32_t HeapSnapshotJSONSerializer::StringHash(const void* string) {
const char* s = reinterpret_cast<const char*>(string);
int len = static_cast<int>(strlen(s));
- return StringHasher::HashSequentialString(s, len,
- v8::internal::kZeroHashSeed);
+ return StringHasher::HashSequentialString(s, len, HashSeed::Default());
}
int HeapSnapshotJSONSerializer::to_node_index(const HeapEntry* e) {
inline uint32_t ComputeStringHash(const char* str, int len) {
uint32_t raw_hash_field =
- StringHasher::HashSequentialString(str, len, kZeroHashSeed);
+ StringHasher::HashSequentialString(str, len, HashSeed::Default());
return Name::HashBits::decode(raw_hash_field);
}
}
uint32_t Hash() {
- return StringHasher::HashSequentialString<uint8_t>(
- data_, length_, v8::internal::kZeroHashSeed);
+ return StringHasher::HashSequentialString<uint8_t>(data_, length_,
+ HashSeed::Default());
}
const unsigned char* data_;
#include "src/heap/heap-inl.h" // crbug.com/v8/8499
#include "src/heap/read-only-heap.h"
#include "src/logging/counters-scopes.h"
+#include "src/numbers/hash-seed.h"
#include "src/objects/slots.h"
#include "src/roots/static-roots.h"
#include "src/snapshot/snapshot-data.h"
PostProcessNewObjectsIfStaticRootsEnabled();
if (should_rehash()) {
- isolate()->heap()->InitializeHashSeed();
+ HashSeed::InitializeRoots(isolate());
Rehash();
}
}
DeserializeDeferredObjects();
if (should_rehash()) {
- // Hash seed was initialized in ReadOnlyDeserializer.
+ // The hash seed has already been initialized in ReadOnlyDeserializer, thus
+ // there is no need to call `HashSeed::InitializeRoots(isolate());`.
Rehash();
}
}
// Comment inserted to prevent header reordering.
#include <type_traits>
+#include "src/common/globals.h"
#include "src/objects/name-inl.h"
#include "src/objects/string-inl.h"
#include "src/strings/char-predicates-inl.h"
return String::CreateHashFieldValue(hash, String::HashFieldType::kHash);
}
+uint32_t StringHasher::MakeArrayIndexHash(uint32_t value, uint32_t length) {
+ // For array indexes mix the length into the hash as an array index could
+ // be zero.
+ DCHECK_LE(length, String::kMaxArrayIndexSize);
+ DCHECK(TenToThe(String::kMaxCachedArrayIndexLength) <
+ (1 << String::kArrayIndexValueBits));
+
+ value <<= String::ArrayIndexValueBits::kShift;
+ value |= length << String::ArrayIndexLengthBits::kShift;
+
+ DCHECK(String::IsIntegerIndex(value));
+ DCHECK_EQ(length <= String::kMaxCachedArrayIndexLength,
+ Name::ContainsCachedArrayIndex(value));
+ return value;
+}
+
+uint32_t StringHasher::DecodeArrayIndexFromHashField(uint32_t raw_hash_field) {
+ DCHECK(String::ContainsCachedArrayIndex(raw_hash_field) ||
+ String::IsIntegerIndex(raw_hash_field));
+ return String::ArrayIndexValueBits::decode(raw_hash_field);
+}
+
+#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+uint32_t StringHasher::SeedArrayIndexValue(uint32_t value,
+ const HashSeed seed) {
+ uint32_t m1 = seed.m1();
+ uint32_t m2 = seed.m2();
+ uint32_t m3 = seed.m3();
+ constexpr uint32_t kShift = Name::kArrayIndexHashShift;
+ constexpr uint32_t kMask = Name::kArrayIndexValueMask;
+ // 3-round xorshift-multiply.
+ uint32_t x = value;
+ x ^= x >> kShift;
+ x = (x * m1) & kMask;
+ x ^= x >> kShift;
+ x = (x * m2) & kMask;
+ x ^= x >> kShift;
+ x = (x * m3) & kMask;
+ x ^= x >> kShift;
+ return x;
+}
+
+uint32_t StringHasher::UnseedArrayIndexValue(uint32_t value,
+ const HashSeed seed) {
+ uint32_t m1_inv = seed.m1_inv();
+ uint32_t m2_inv = seed.m2_inv();
+ uint32_t m3_inv = seed.m3_inv();
+ uint32_t x = value;
+ constexpr uint32_t kShift = Name::kArrayIndexHashShift;
+ constexpr uint32_t kMask = Name::kArrayIndexValueMask;
+ // 3-round xorshift-multiply (inverse).
+ // Xorshift is an involution when kShift is at least half of the value width.
+ x ^= x >> kShift;
+ x = (x * m3_inv) & kMask;
+ x ^= x >> kShift;
+ x = (x * m2_inv) & kMask;
+ x ^= x >> kShift;
+ x = (x * m1_inv) & kMask;
+ x ^= x >> kShift;
+ return x;
+}
+#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+
+uint32_t StringHasher::MakeArrayIndexHash(
+ uint32_t value, int length, [[maybe_unused]] const HashSeed seed) {
+#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+ value = SeedArrayIndexValue(value, seed);
+#endif
+ return MakeArrayIndexHash(value, static_cast<uint32_t>(length));
+}
+
+uint32_t StringHasher::DecodeArrayIndexFromHashField(
+ uint32_t raw_hash_field, [[maybe_unused]] const HashSeed seed) {
+ uint32_t value = DecodeArrayIndexFromHashField(raw_hash_field);
+#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+ value = UnseedArrayIndexValue(value, seed);
+#endif
+ return value;
+}
+
template <typename char_t>
uint32_t StringHasher::HashSequentialString(const char_t* chars_raw, int length,
- uint64_t seed) {
+ const HashSeed seed) {
static_assert(std::is_integral<char_t>::value);
static_assert(sizeof(char_t) <= 2);
using uchar = typename std::make_unsigned<char_t>::type;
int i = 1;
do {
if (i == length) {
- return MakeArrayIndexHash(index, length);
+ return MakeArrayIndexHash(index, length, seed);
}
} while (TryAddArrayIndexChar(&index, chars[i++]));
}
// Perform a regular hash computation, and additionally check
// if there are non-digit characters.
String::HashFieldType type = String::HashFieldType::kIntegerIndex;
- uint32_t running_hash = static_cast<uint32_t>(seed);
+ uint32_t running_hash = static_cast<uint32_t>(seed.seed());
uint64_t index_big = 0;
const uchar* end = &chars[length];
while (chars != end) {
}
// Non-index hash.
- uint32_t running_hash = static_cast<uint32_t>(seed);
+ uint32_t running_hash = static_cast<uint32_t>(seed.seed());
const uchar* end = &chars[length];
while (chars != end) {
running_hash = AddCharacterCore(running_hash, *chars++);
#define V8_STRINGS_STRING_HASHER_H_
#include "src/common/globals.h"
+#include "src/numbers/hash-seed.h"
namespace v8 {
StringHasher() = delete;
template <typename char_t>
static inline uint32_t HashSequentialString(const char_t* chars, int length,
- uint64_t seed);
+ const HashSeed seed);
- // Calculated hash value for a string consisting of 1 to
+ // Calculate the hash value for a string consisting of 1 to
// String::kMaxArrayIndexSize digits with no leading zeros (except "0").
- // value is represented decimal value.
- static uint32_t MakeArrayIndexHash(uint32_t value, int length);
+ //
+ // The entire hash field consists of (from least significant bit to most):
+ // - HashFieldType::kIntegerIndex
+ // - kArrayIndexValueBits::kSize bits containing the hash value
+ // - The length of the decimal string
+ //
+ // When V8_ENABLE_SEEDED_ARRAY_INDEX_HASH is enabled, the numeric value
+ // is scrambled using secrets derived from the hash seed. When it's disabled
+ // the public overloads ignore the seed, whose retrieval should be optimized
+ // away in common configurations.
+ static V8_INLINE uint32_t MakeArrayIndexHash(uint32_t value, int length,
+ const HashSeed seed);
+ // Decode array index value from raw hash field and reverse seeding, if any.
+ static V8_INLINE uint32_t
+ DecodeArrayIndexFromHashField(uint32_t raw_hash_field, const HashSeed seed);
// No string is allowed to have a hash of zero. That value is reserved
// for internal properties. If the hash calculation yields zero then we
V8_INLINE static uint32_t GetHashCore(uint32_t running_hash);
static inline uint32_t GetTrivialHash(int length);
+
+ private:
+ // Raw encode/decode without seeding. Use the public overloads above.
+ static V8_INLINE uint32_t MakeArrayIndexHash(uint32_t value, uint32_t length);
+ static V8_INLINE uint32_t
+ DecodeArrayIndexFromHashField(uint32_t raw_hash_field);
+
+#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+ // When V8_ENABLE_SEEDED_ARRAY_INDEX_HASH is enabled, the numeric value
+ // will be scrambled with 3 rounds of xorshift-multiply.
+ //
+ // x ^= x >> kShift; x = (x * m1) & kMask; // round 1
+ // x ^= x >> kShift; x = (x * m2) & kMask; // round 2
+ // x ^= x >> kShift; x = (x * m3) & kMask; // round 3
+ // x ^= x >> kShift; // finalize
+ //
+ // To decode, apply the same steps with the modular inverses of m1, m2
+ // and m3 in reverse order.
+ //
+ // x ^= x >> kShift; x = (x * m3_inv) & kMask; // round 1
+ // x ^= x >> kShift; x = (x * m2_inv) & kMask; // round 2
+ // x ^= x >> kShift; x = (x * m1_inv) & kMask; // round 3
+ // x ^= x >> kShift; // finalize
+ //
+ // where kShift = kArrayIndexValueBits / 2, kMask = kArrayIndexValueMask,
+ // m1, m2, m3 (all odd) are derived from the Isolate's rapidhash secrets.
+ // m1_inv, m2_inv, m3_inv (modular inverses) are precomputed so that
+ // UnseedArrayIndexValue can quickly recover the original value.
+ static V8_INLINE uint32_t SeedArrayIndexValue(uint32_t value,
+ const HashSeed seed);
+ // Decode array index value from seeded raw hash field.
+ static V8_INLINE uint32_t UnseedArrayIndexValue(uint32_t value,
+ const HashSeed seed);
+#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
};
// Useful for std containers that require something ()'able.
struct SeededStringHasher {
- explicit SeededStringHasher(uint64_t hashseed) : hashseed_(hashseed) {}
+ explicit SeededStringHasher(const HashSeed hashseed) : hashseed_(hashseed) {}
inline std::size_t operator()(const char* name) const;
- uint64_t hashseed_;
+ const HashSeed hashseed_;
};
// Useful for std containers that require something ()'able.
#endif
build_flags_["V8_ENABLE_SANDBOX"] = V8_ENABLE_SANDBOX_BOOL;
build_flags_["DEBUG"] = DEBUG_BOOL;
+#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
+ build_flags_["V8_ENABLE_SEEDED_ARRAY_INDEX_HASH"] = true;
+#else
+ build_flags_["V8_ENABLE_SEEDED_ARRAY_INDEX_HASH"] = false;
+#endif
}
static bool GetFlag(const std::string& name, const char* production) {
auto it = Get().build_flags_.find(name);
// ----------------------------------------------------------------------------
// Hash function.
-static const uint64_t kZeroHashSeed = 0;
-
// Thomas Wang, Integer Hash Functions.
// http://www.concentric.net/~Ttwang/tech/inthash.htm`
inline uint32_t ComputeUnseededHash(uint32_t key) {
size_t GetWireBytesHash(base::Vector<const uint8_t> wire_bytes) {
return StringHasher::HashSequentialString(
reinterpret_cast<const char*>(wire_bytes.begin()), wire_bytes.length(),
- kZeroHashSeed);
+ HashSeed::Default());
}
int NumFeedbackSlots(const WasmModule* module, int func_index) {
--- /dev/null
+/*
+ * rapidhash - Very fast, high quality, platform independant hashing algorithm.
+ * Copyright (C) 2024 Nicolas De Carli
+ *
+ * Based on 'wyhash', by Wang Yi <godspeed_china@yeah.net>
+ *
+ * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at:
+ * - rapidhash source repository: https://github.com/Nicoshev/rapidhash
+ */
--- /dev/null
+leszeks@chromium.org
+mlippautz@chromium.org
--- /dev/null
+Name: rapidhash
+URL: https://github.com/Nicoshev/rapidhash/blob/master/rapidhash.h
+Version: N/A
+Date: 2024-07-08
+Revision: 588978411df8683777429f729be5213eb1bfd5f3
+License: BSD 2-clause
+License File: LICENSE
+Shipped: Yes
+Security Critical: yes
+
+Description:
+A fast hash function.
+
+This is a fork of upstream, with the parts that we don't need or want
+removed. We do not intend to sync regularly with upstream git. This
+particular version is copied over from Chromium.
+
+Local Modifications:
+- Copied over from Chromium's third_party/rapidhash with all its changes.
+- Removed base/ includes and replaced with V8 versions.
--- /dev/null
+/*
+ * rapidhash - Very fast, high quality, platform-independent hashing algorithm.
+ * Copyright (C) 2024 Nicolas De Carli
+ *
+ * Based on 'wyhash', by Wang Yi <godspeed_china@yeah.net>
+ *
+ * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at:
+ * - rapidhash source repository: https://github.com/Nicoshev/rapidhash
+ */
+
+#ifndef _THIRD_PARTY_RAPIDHASH_H
+#define _THIRD_PARTY_RAPIDHASH_H 1
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <tuple>
+#include <utility>
+
+#include "include/v8config.h"
+#include "src/base/logging.h"
+
+// Chromium has some local modifications to upstream rapidhash,
+// mostly around the concept of HashReaders (including slightly
+// more comments for ease of understanding). Generally, rapidhash
+// hashes bytes without really caring what these bytes are,
+// but often in Chromium, we want to hash _strings_, and strings
+// can have multiple representations. In particular, the WTF
+// StringImpl class (and by extension, String and AtomicString)
+// can have three different states:
+//
+// 1. Latin1 (or ASCII) code points, in 8 bits per character (LChar).
+// 2. The same code points, in 16 bits per character (UChar).
+// 3. Strings actually including non-Latin1 code points, in 16 bits
+// per character (UChar, UTF-16 encoding).
+//
+// The problem is that we'd like to hash case #1 and #2 to actually
+// return the same hash. There are several ways we could deal with this:
+//
+// a) Just ignore the problem and hash everything as raw bytes;
+// case #2 is fairly rare, and some algorithms (e.g. opportunistic
+// deduplication) could live with some false negatives.
+// b) Expand all strings to UTF-16 before hashing. This was the
+// strategy for the old StringHasher (using SuperFastHash),
+// as it naturally worked in 16-bit increments and it is probably
+// the simplest. However, this both halves the throughput of the
+// hasher and incurs conversion costs.
+// c) Detect #2 and convert those cases to #1 (UTF-16 to Latin1
+// conversion), and apart from that, juts hash bytes.
+//
+// b) is used in e.g. CaseFoldingHash, but c) is the one we use the most
+// often. Most strings that we want to hash are 8-bit (e.g. HTML tags), so
+// that makes the common case fast. And for UChar strings, it is fairly fast
+// to make a pre-pass over the string to check for the existence of any
+// non-Latin1 characters. Of course, #1 and #3 can just be hashed as raw
+// bytes, as strings from those groups can never be equal anyway.
+//
+// To support these 8-to-16 and 16-to-8 conversions, and also things like
+// lowercasing on the fly, we have modified rapidhash to be templatized
+// on a HashReader, supplying bytes to the hash function. For the default
+// case of just hashing raw bytes, this is simply reading. But for other
+// conversions, the reader is doing slightly more work, such as packing
+// or unpacking. See e.g. ConvertTo8BitHashReader in string_hasher.h.
+//
+// Note that this reader could be made constexpr if we needed to evaluate
+// hashes at compile-time.
+struct PlainHashReader {
+ // If this is different from 1 (only 1, 2 and 4 are really reasonable
+ // options), it means the reader consumes its input at a faster rate than
+ // would be normally expected. For instance, if it is 2, it means that when
+ // the reader produces 64 bits, it needs to move its input 128 bits
+ // ahead. This is used when e.g. a reader converts from UTF-16 to ASCII,
+ // by removing zeros. The input length must divide the compression factor.
+ static constexpr unsigned kCompressionFactor = 1;
+
+ // This is the opposite of kExpansionFactor. It does not make sense to have
+ // both kCompressionFactor and kExpansionFactor different from 1.
+ // The output length must divide the expansion factor.
+ static constexpr unsigned kExpansionFactor = 1;
+
+ // Read 64 little-endian bits from the input, taking into account
+ // the expansion/compression if relevant. Unaligned reads must be
+ // supported.
+ static inline uint64_t Read64(const uint8_t* p) {
+ uint64_t v;
+ memcpy(&v, p, 8);
+ return v;
+ }
+
+ // Similarly, read 32 little-endian (unaligned) bits. Note that
+ // this must return uint64_t, _not_ uint32_t, as the hasher assumes
+ // it can freely shift such numbers up and down.
+ static inline uint64_t Read32(const uint8_t* p) {
+ uint32_t v;
+ memcpy(&v, p, 4);
+ return v;
+ }
+
+ // Read 1, 2 or 3 bytes from the input, and distribute them somewhat
+ // reasonably across the resulting 64-bit number. Note that this is
+ // done in a branch-free fashion, so that it always reads three bytes
+ // but never reads past the end.
+ //
+ // This is only used in the case where we hash a string of exactly
+ // 1, 2 or 3 bytes; if the hash is e.g. 7 bytes, two overlapping 32-bit
+ // reads will be done instead. Note that if you have kCompressionFactor == 2,
+ // this means that this will only ever be called with k = 2.
+ static inline uint64_t ReadSmall(const uint8_t* p, size_t k) {
+ return (uint64_t{p[0]} << 56) | (uint64_t{p[k >> 1]} << 32) | p[k - 1];
+ }
+};
+
+/*
+ * Likely and unlikely macros.
+ */
+#define _likely_(x) __builtin_expect(x, 1)
+#define _unlikely_(x) __builtin_expect(x, 0)
+
+/*
+ * 64*64 -> 128bit multiply function.
+ *
+ * @param A Address of 64-bit number.
+ * @param B Address of 64-bit number.
+ *
+ * Calculates 128-bit C = *A * *B.
+ */
+inline std::pair<uint64_t, uint64_t> rapid_mul128(uint64_t A, uint64_t B) {
+#if defined(__SIZEOF_INT128__)
+ __uint128_t r = A;
+ r *= B;
+ return {static_cast<uint64_t>(r), static_cast<uint64_t>(r >> 64)};
+#else
+ // High and low 32 bits of A and B.
+ uint64_t a_high = A >> 32, b_high = B >> 32, a_low = (uint32_t)A,
+ b_low = (uint32_t)B;
+
+ // Intermediate products.
+ uint64_t result_high = a_high * b_high;
+ uint64_t result_m0 = a_high * b_low;
+ uint64_t result_m1 = b_high * a_low;
+ uint64_t result_low = a_low * b_low;
+
+ // Final sum. The lower 64-bit addition can overflow twice,
+ // so accumulate carry as we go.
+ uint64_t high = result_high + (result_m0 >> 32) + (result_m1 >> 32);
+ uint64_t t = result_low + (result_m0 << 32);
+ high += (t < result_low); // Carry.
+ uint64_t low = t + (result_m1 << 32);
+ high += (low < t); // Carry.
+
+ return {low, high};
+#endif
+}
+
+/*
+ * Multiply and xor mix function.
+ *
+ * @param A 64-bit number.
+ * @param B 64-bit number.
+ *
+ * Calculates 128-bit C = A * B.
+ * Returns 64-bit xor between high and low 64 bits of C.
+ */
+inline uint64_t rapid_mix(uint64_t A, uint64_t B) {
+ std::tie(A, B) = rapid_mul128(A, B);
+ return A ^ B;
+}
+
+/*
+ * rapidhash main function.
+ *
+ * @param key Buffer to be hashed.
+ * @param len Number of input bytes coming from the reader.
+ * @param seed 64-bit seed used to alter the hash result predictably.
+ * @param secret Triplet of 64-bit secrets used to alter hash result
+ * predictably.
+ *
+ * Returns a 64-bit hash.
+ *
+ * The data flow is separated so that we never mix input data with pointers;
+ *
+ * a, b, seed, secret[0], secret[1], secret[2], see1 and see2 are affected
+ * by the input data.
+ *
+ * p is only ever indexed by len, delta (comes from len only), i (comes from
+ * len only) or integral constants. len is const, so no data can flow into it.
+ *
+ * No other reads from memory take place. No writes to memory take place.
+ */
+template <class Reader = PlainHashReader>
+V8_INLINE uint64_t rapidhash(const uint8_t* p, const size_t len, uint64_t seed,
+ const uint64_t secret[3]) {
+ // For brevity.
+ constexpr unsigned x = Reader::kCompressionFactor;
+ constexpr unsigned y = Reader::kExpansionFactor;
+ DCHECK_EQ(len % y, 0u);
+
+ seed ^= rapid_mix(seed ^ secret[0], secret[1]) ^ len;
+ uint64_t a, b;
+ if (_likely_(len <= 16)) {
+ if (_likely_(len >= 4)) {
+ // Read the first and last 32 bits (they may overlap).
+ const uint8_t* plast = p + (len - 4) * x / y;
+ a = (Reader::Read32(p) << 32) | Reader::Read32(plast);
+
+ // This is equivalent to: delta = (len >= 8) ? 4 : 0;
+ const uint64_t delta = ((len & 24) >> (len >> 3)) * x / y;
+ b = ((Reader::Read32(p + delta) << 32) | Reader::Read32(plast - delta));
+ } else if (_likely_(len > 0)) {
+ // 1, 2 or 3 bytes.
+ a = Reader::ReadSmall(p, len);
+ b = 0;
+ } else {
+ a = b = 0;
+ }
+ } else {
+ size_t i = len;
+ if (_unlikely_(i > 48)) {
+ uint64_t see1 = seed, see2 = seed;
+ do {
+ seed = rapid_mix(Reader::Read64(p) ^ secret[0],
+ Reader::Read64(p + 8 * x / y) ^ seed);
+ see1 = rapid_mix(Reader::Read64(p + 16 * x / y) ^ secret[1],
+ Reader::Read64(p + 24 * x / y) ^ see1);
+ see2 = rapid_mix(Reader::Read64(p + 32 * x / y) ^ secret[2],
+ Reader::Read64(p + 40 * x / y) ^ see2);
+ p += 48 * x / y;
+ i -= 48;
+ } while (_likely_(i >= 48));
+ seed ^= see1 ^ see2;
+ }
+ if (i > 16) {
+ seed = rapid_mix(Reader::Read64(p) ^ secret[2],
+ Reader::Read64(p + 8 * x / y) ^ seed ^ secret[1]);
+ if (i > 32) {
+ seed = rapid_mix(Reader::Read64(p + 16 * x / y) ^ secret[2],
+ Reader::Read64(p + 24 * x / y) ^ seed);
+ }
+ }
+
+ // Read the last 2x64 bits. Note that due to the division by y,
+ // this must be a signed quantity (or the division would become
+ // unsigned, possibly moving the sign bit down into the address).
+ // Similarly for x.
+ a = Reader::Read64(p + (static_cast<ptrdiff_t>(i) - 16) *
+ static_cast<int>(x) / static_cast<int>(y));
+ b = Reader::Read64(p + (static_cast<ptrdiff_t>(i) - 8) *
+ static_cast<int>(x) / static_cast<int>(y));
+ }
+ a ^= secret[1];
+ b ^= seed;
+ std::tie(a, b) = rapid_mul128(a, b);
+ return rapid_mix(a ^ secret[0] ^ len, b ^ secret[1]);
+}
+
+#undef _likely_
+#undef _unlikely_
+
+#endif // _THIRD_PARTY_RAPIDHASH_H
--- /dev/null
+/*
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * For more information, please refer to <https://unlicense.org/>
+ *
+ * author: 王一 Wang Yi <godspeed_china@yeah.net>
+ * contributors: Reini Urban, Dietrich Epp, Joshua Haberman, Tommy Ettinger,
+ * Daniel Lemire, Otmar Ertl, cocowalla, leo-yuriev, Diego Barrios Romero,
+ * paulie-g, dumblob, Yann Collet, ivte-ms, hyb, James Z.M. Gao, easyaspi314
+ * (Devin), TheOneric
+ */
+
+#ifndef _THIRD_PARTY_RAPIDHASH_SECRET_H
+#define _THIRD_PARTY_RAPIDHASH_SECRET_H 1
+
+#include "rapidhash.h"
+#include "src/base/bits.h"
+
+// Default secret parameters. If we wanted to, we could generate our own
+// versions of these at renderer startup in order to perturb the hash
+// and make it more DoS-resistant (similar to what base/hash.h does),
+// but generating new ones takes a little bit of time (~200 µs on a desktop
+// machine as of 2024), and good-quality random numbers may not be copious
+// from within the sandbox. The secret concept is inherited from wyhash,
+// described by the wyhash author here:
+//
+// https://github.com/wangyi-fudan/wyhash/issues/139
+//
+// The rules are:
+//
+// 1. Each byte must be “balanced”, i.e., have exactly 4 bits set.
+// (This is trivially done by just precompting a LUT with the
+// possible bytes and picking from those.)
+//
+// 2. Each 64-bit group should have a Hamming distance of 32 to
+// all the others (i.e., popcount(secret[i] ^ secret[j]) == 32).
+// This is just done by rejection sampling.
+//
+// 3. Each 64-bit group should be prime. It's not obvious that this
+// is really needed for the hash, as opposed to wyrand which also
+// uses the same secret, but according to the author, it is
+// “a feeling to be perfect”. This naturally means the last byte
+// cannot be divisible by 2, but apart from that, is easiest
+// checked by testing a few small factors and then the Miller-Rabin
+// test, which rejects nearly all bad candidates in the first iteration
+// and is fast as long as we have 64x64 -> 128 bit muls and modulos.
+
+static constexpr uint64_t RAPIDHASH_DEFAULT_SECRET[3] = {
+ 0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull, 0x4b33a62ed433d4a3ull};
+
+#if !V8_USE_DEFAULT_HASHER_SECRET
+
+namespace detail {
+
+static inline uint64_t wyrand(uint64_t* seed) {
+ *seed += 0x2d358dccaa6c78a5ull;
+ return rapid_mix(*seed, *seed ^ 0x8bb84b93962eacc9ull);
+}
+
+static inline unsigned long long mul_mod(unsigned long long a,
+ unsigned long long b,
+ unsigned long long m) {
+ unsigned long long r = 0;
+ while (b) {
+ if (b & 1) {
+ unsigned long long r2 = r + a;
+ if (r2 < r) r2 -= m;
+ r = r2 % m;
+ }
+ b >>= 1;
+ if (b) {
+ unsigned long long a2 = a + a;
+ if (a2 < a) a2 -= m;
+ a = a2 % m;
+ }
+ }
+ return r;
+}
+
+static inline unsigned long long pow_mod(unsigned long long a,
+ unsigned long long b,
+ unsigned long long m) {
+ unsigned long long r = 1;
+ while (b) {
+ if (b & 1) r = mul_mod(r, a, m);
+ b >>= 1;
+ if (b) a = mul_mod(a, a, m);
+ }
+ return r;
+}
+
+static unsigned sprp(unsigned long long n, unsigned long long a) {
+ unsigned long long d = n - 1;
+ unsigned char s = 0;
+ while (!(d & 0xff)) {
+ d >>= 8;
+ s += 8;
+ }
+ if (!(d & 0xf)) {
+ d >>= 4;
+ s += 4;
+ }
+ if (!(d & 0x3)) {
+ d >>= 2;
+ s += 2;
+ }
+ if (!(d & 0x1)) {
+ d >>= 1;
+ s += 1;
+ }
+ unsigned long long b = pow_mod(a, d, n);
+ if ((b == 1) || (b == (n - 1))) return 1;
+ unsigned char r;
+ for (r = 1; r < s; r++) {
+ b = mul_mod(b, b, n);
+ if (b <= 1) return 0;
+ if (b == (n - 1)) return 1;
+ }
+ return 0;
+}
+
+static unsigned is_prime(unsigned long long n) {
+ if (n < 2 || !(n & 1)) return 0;
+ if (n < 4) return 1;
+ if (!sprp(n, 2)) return 0;
+ if (n < 2047) return 1;
+ if (!sprp(n, 3)) return 0;
+ if (!sprp(n, 5)) return 0;
+ if (!sprp(n, 7)) return 0;
+ if (!sprp(n, 11)) return 0;
+ if (!sprp(n, 13)) return 0;
+ if (!sprp(n, 17)) return 0;
+ if (!sprp(n, 19)) return 0;
+ if (!sprp(n, 23)) return 0;
+ if (!sprp(n, 29)) return 0;
+ if (!sprp(n, 31)) return 0;
+ if (!sprp(n, 37)) return 0;
+ return 1;
+}
+
+} // namespace detail
+
+static inline void rapidhash_make_secret(uint64_t seed, uint64_t* secret) {
+ uint8_t c[] = {15, 23, 27, 29, 30, 39, 43, 45, 46, 51, 53, 54,
+ 57, 58, 60, 71, 75, 77, 78, 83, 85, 86, 89, 90,
+ 92, 99, 101, 102, 105, 106, 108, 113, 114, 116, 120, 135,
+ 139, 141, 142, 147, 149, 150, 153, 154, 156, 163, 165, 166,
+ 169, 170, 172, 177, 178, 180, 184, 195, 197, 198, 201, 202,
+ 204, 209, 210, 212, 216, 225, 226, 228, 232, 240};
+ for (size_t i = 0; i < 3; i++) {
+ uint8_t ok;
+ do {
+ ok = 1;
+ secret[i] = 0;
+ for (size_t j = 0; j < 64; j += 8)
+ secret[i] |= static_cast<uint64_t>(c[detail::wyrand(&seed) % sizeof(c)])
+ << j;
+ if (secret[i] % 2 == 0) {
+ ok = 0;
+ continue;
+ }
+ for (size_t j = 0; j < i; j++) {
+ if (v8::base::bits::CountPopulation(secret[j] ^ secret[i]) != 32) {
+ ok = 0;
+ break;
+ }
+ }
+ if (ok && !detail::is_prime(secret[i])) {
+ ok = 0;
+ }
+ } while (!ok);
+ }
+}
+
+#endif // !V8_USE_DEFAULT_HASHER_SECRET
+
+#endif // _THIRD_PARTY_RAPIDHASH_SECRET_H
--- /dev/null
+'use strict';
+
+// See https://hackerone.com/reports/3511792
+
+const payload = [];
+const val = 1234;
+const MOD = 2 ** 19;
+const CHN = 2 ** 17;
+const REP = 2 ** 17;
+
+if (process.argv[2] === 'benign') {
+ for (let i = 0; i < CHN + REP; i++) {
+ payload.push(`${val + i}`);
+ }
+} else {
+ let j = val + MOD;
+ for (let i = 1; i < CHN; i++) {
+ payload.push(`${j}`);
+ j = (j + i) % MOD;
+ }
+ for (let k = 0; k < REP; k++) {
+ payload.push(`${val}`);
+ }
+}
+
+const string = JSON.stringify({ data: payload });
+JSON.parse(string);
--- /dev/null
+'use strict';
+
+// This is a regression test for https://hackerone.com/reports/3511792
+
+require('../common');
+const assert = require('assert');
+const { spawnSync } = require('child_process');
+const { performance } = require('perf_hooks');
+const fixtures = require('../common/fixtures');
+
+const fixturePath = fixtures.path('array-hash-collision.js');
+
+const t0 = performance.now();
+const benignResult = spawnSync(process.execPath, [fixturePath, 'benign']);
+const benignTime = performance.now() - t0;
+assert.strictEqual(benignResult.status, 0);
+console.log(`Benign test completed in ${benignTime.toFixed(2)}ms.`);
+
+const t1 = performance.now();
+const maliciousResult = spawnSync(process.execPath, [fixturePath, 'malicious'], {
+ timeout: Math.ceil(benignTime * 10),
+});
+const maliciousTime = performance.now() - t1;
+console.log(`Malicious test completed in ${maliciousTime.toFixed(2)}ms.`);
+
+assert.strictEqual(maliciousResult.status, 0, 'Hash flooding regression detected: ' +
+ `Benign took ${benignTime}ms, malicious took more than ${maliciousTime}ms.`);
BUILD_ARCH_TYPE=$1
V8_BUILD_OPTIONS=$2
+EXTRA_V8_OPTS="v8_enable_static_roots=false v8_enable_seeded_array_index_hash=true v8_use_default_hasher_secret=false"
+
cd deps/v8 || exit
find . -type d -name .git -print0 | xargs -0 rm -rf
../../tools/v8/fetch_deps.py .
# Use Siphash as added protection against hash flooding attacks.
'v8_use_siphash%': 0,
+ # Enable seeded array index hash.
+ 'v8_enable_seeded_array_index_hash%': 1,
+
# Use Perfetto (https://perfetto.dev) as the default TracingController. Not
# currently implemented.
'v8_use_perfetto%': 0,
['v8_use_siphash==1', {
'defines': ['V8_USE_SIPHASH',],
}],
+ ['v8_enable_seeded_array_index_hash==1', {
+ 'defines': ['V8_ENABLE_SEEDED_ARRAY_INDEX_HASH',],
+ }],
['v8_enable_shared_ro_heap==1', {
'defines': ['V8_SHARED_RO_HEAP',],
}],